data(Prestige)
#add column name to first column:
Prestige <- cbind(as_tibble(rownames(Prestige)), Prestige) %>%
rename(job = value) #can't wrap in as_tibble to drop row names as this messes up the labeling on Avplots
## Warning: Calling `as_tibble()` on a vector is discouraged, because the behavior is likely to change in the future. Use `tibble::enframe(name = NULL)` instead.
## This warning is displayed once per session.
Prestige %>%
kable() %>%
kable_styling()
| job | education | income | women | prestige | census | type | |
|---|---|---|---|---|---|---|---|
| gov.administrators | gov.administrators | 13.11 | 12351 | 11.16 | 68.8 | 1113 | prof |
| general.managers | general.managers | 12.26 | 25879 | 4.02 | 69.1 | 1130 | prof |
| accountants | accountants | 12.77 | 9271 | 15.70 | 63.4 | 1171 | prof |
| purchasing.officers | purchasing.officers | 11.42 | 8865 | 9.11 | 56.8 | 1175 | prof |
| chemists | chemists | 14.62 | 8403 | 11.68 | 73.5 | 2111 | prof |
| physicists | physicists | 15.64 | 11030 | 5.13 | 77.6 | 2113 | prof |
| biologists | biologists | 15.09 | 8258 | 25.65 | 72.6 | 2133 | prof |
| architects | architects | 15.44 | 14163 | 2.69 | 78.1 | 2141 | prof |
| civil.engineers | civil.engineers | 14.52 | 11377 | 1.03 | 73.1 | 2143 | prof |
| mining.engineers | mining.engineers | 14.64 | 11023 | 0.94 | 68.8 | 2153 | prof |
| surveyors | surveyors | 12.39 | 5902 | 1.91 | 62.0 | 2161 | prof |
| draughtsmen | draughtsmen | 12.30 | 7059 | 7.83 | 60.0 | 2163 | prof |
| computer.programers | computer.programers | 13.83 | 8425 | 15.33 | 53.8 | 2183 | prof |
| economists | economists | 14.44 | 8049 | 57.31 | 62.2 | 2311 | prof |
| psychologists | psychologists | 14.36 | 7405 | 48.28 | 74.9 | 2315 | prof |
| social.workers | social.workers | 14.21 | 6336 | 54.77 | 55.1 | 2331 | prof |
| lawyers | lawyers | 15.77 | 19263 | 5.13 | 82.3 | 2343 | prof |
| librarians | librarians | 14.15 | 6112 | 77.10 | 58.1 | 2351 | prof |
| vocational.counsellors | vocational.counsellors | 15.22 | 9593 | 34.89 | 58.3 | 2391 | prof |
| ministers | ministers | 14.50 | 4686 | 4.14 | 72.8 | 2511 | prof |
| university.teachers | university.teachers | 15.97 | 12480 | 19.59 | 84.6 | 2711 | prof |
| primary.school.teachers | primary.school.teachers | 13.62 | 5648 | 83.78 | 59.6 | 2731 | prof |
| secondary.school.teachers | secondary.school.teachers | 15.08 | 8034 | 46.80 | 66.1 | 2733 | prof |
| physicians | physicians | 15.96 | 25308 | 10.56 | 87.2 | 3111 | prof |
| veterinarians | veterinarians | 15.94 | 14558 | 4.32 | 66.7 | 3115 | prof |
| osteopaths.chiropractors | osteopaths.chiropractors | 14.71 | 17498 | 6.91 | 68.4 | 3117 | prof |
| nurses | nurses | 12.46 | 4614 | 96.12 | 64.7 | 3131 | prof |
| nursing.aides | nursing.aides | 9.45 | 3485 | 76.14 | 34.9 | 3135 | bc |
| physio.therapsts | physio.therapsts | 13.62 | 5092 | 82.66 | 72.1 | 3137 | prof |
| pharmacists | pharmacists | 15.21 | 10432 | 24.71 | 69.3 | 3151 | prof |
| medical.technicians | medical.technicians | 12.79 | 5180 | 76.04 | 67.5 | 3156 | wc |
| commercial.artists | commercial.artists | 11.09 | 6197 | 21.03 | 57.2 | 3314 | prof |
| radio.tv.announcers | radio.tv.announcers | 12.71 | 7562 | 11.15 | 57.6 | 3337 | wc |
| athletes | athletes | 11.44 | 8206 | 8.13 | 54.1 | 3373 | NA |
| secretaries | secretaries | 11.59 | 4036 | 97.51 | 46.0 | 4111 | wc |
| typists | typists | 11.49 | 3148 | 95.97 | 41.9 | 4113 | wc |
| bookkeepers | bookkeepers | 11.32 | 4348 | 68.24 | 49.4 | 4131 | wc |
| tellers.cashiers | tellers.cashiers | 10.64 | 2448 | 91.76 | 42.3 | 4133 | wc |
| computer.operators | computer.operators | 11.36 | 4330 | 75.92 | 47.7 | 4143 | wc |
| shipping.clerks | shipping.clerks | 9.17 | 4761 | 11.37 | 30.9 | 4153 | wc |
| file.clerks | file.clerks | 12.09 | 3016 | 83.19 | 32.7 | 4161 | wc |
| receptionsts | receptionsts | 11.04 | 2901 | 92.86 | 38.7 | 4171 | wc |
| mail.carriers | mail.carriers | 9.22 | 5511 | 7.62 | 36.1 | 4172 | wc |
| postal.clerks | postal.clerks | 10.07 | 3739 | 52.27 | 37.2 | 4173 | wc |
| telephone.operators | telephone.operators | 10.51 | 3161 | 96.14 | 38.1 | 4175 | wc |
| collectors | collectors | 11.20 | 4741 | 47.06 | 29.4 | 4191 | wc |
| claim.adjustors | claim.adjustors | 11.13 | 5052 | 56.10 | 51.1 | 4192 | wc |
| travel.clerks | travel.clerks | 11.43 | 6259 | 39.17 | 35.7 | 4193 | wc |
| office.clerks | office.clerks | 11.00 | 4075 | 63.23 | 35.6 | 4197 | wc |
| sales.supervisors | sales.supervisors | 9.84 | 7482 | 17.04 | 41.5 | 5130 | wc |
| commercial.travellers | commercial.travellers | 11.13 | 8780 | 3.16 | 40.2 | 5133 | wc |
| sales.clerks | sales.clerks | 10.05 | 2594 | 67.82 | 26.5 | 5137 | wc |
| newsboys | newsboys | 9.62 | 918 | 7.00 | 14.8 | 5143 | NA |
| service.station.attendant | service.station.attendant | 9.93 | 2370 | 3.69 | 23.3 | 5145 | bc |
| insurance.agents | insurance.agents | 11.60 | 8131 | 13.09 | 47.3 | 5171 | wc |
| real.estate.salesmen | real.estate.salesmen | 11.09 | 6992 | 24.44 | 47.1 | 5172 | wc |
| buyers | buyers | 11.03 | 7956 | 23.88 | 51.1 | 5191 | wc |
| firefighters | firefighters | 9.47 | 8895 | 0.00 | 43.5 | 6111 | bc |
| policemen | policemen | 10.93 | 8891 | 1.65 | 51.6 | 6112 | bc |
| cooks | cooks | 7.74 | 3116 | 52.00 | 29.7 | 6121 | bc |
| bartenders | bartenders | 8.50 | 3930 | 15.51 | 20.2 | 6123 | bc |
| funeral.directors | funeral.directors | 10.57 | 7869 | 6.01 | 54.9 | 6141 | bc |
| babysitters | babysitters | 9.46 | 611 | 96.53 | 25.9 | 6147 | NA |
| launderers | launderers | 7.33 | 3000 | 69.31 | 20.8 | 6162 | bc |
| janitors | janitors | 7.11 | 3472 | 33.57 | 17.3 | 6191 | bc |
| elevator.operators | elevator.operators | 7.58 | 3582 | 30.08 | 20.1 | 6193 | bc |
| farmers | farmers | 6.84 | 3643 | 3.60 | 44.1 | 7112 | NA |
| farm.workers | farm.workers | 8.60 | 1656 | 27.75 | 21.5 | 7182 | bc |
| rotary.well.drillers | rotary.well.drillers | 8.88 | 6860 | 0.00 | 35.3 | 7711 | bc |
| bakers | bakers | 7.54 | 4199 | 33.30 | 38.9 | 8213 | bc |
| slaughterers.1 | slaughterers.1 | 7.64 | 5134 | 17.26 | 25.2 | 8215 | bc |
| slaughterers.2 | slaughterers.2 | 7.64 | 5134 | 17.26 | 34.8 | 8215 | bc |
| canners | canners | 7.42 | 1890 | 72.24 | 23.2 | 8221 | bc |
| textile.weavers | textile.weavers | 6.69 | 4443 | 31.36 | 33.3 | 8267 | bc |
| textile.labourers | textile.labourers | 6.74 | 3485 | 39.48 | 28.8 | 8278 | bc |
| tool.die.makers | tool.die.makers | 10.09 | 8043 | 1.50 | 42.5 | 8311 | bc |
| machinists | machinists | 8.81 | 6686 | 4.28 | 44.2 | 8313 | bc |
| sheet.metal.workers | sheet.metal.workers | 8.40 | 6565 | 2.30 | 35.9 | 8333 | bc |
| welders | welders | 7.92 | 6477 | 5.17 | 41.8 | 8335 | bc |
| auto.workers | auto.workers | 8.43 | 5811 | 13.62 | 35.9 | 8513 | bc |
| aircraft.workers | aircraft.workers | 8.78 | 6573 | 5.78 | 43.7 | 8515 | bc |
| electronic.workers | electronic.workers | 8.76 | 3942 | 74.54 | 50.8 | 8534 | bc |
| radio.tv.repairmen | radio.tv.repairmen | 10.29 | 5449 | 2.92 | 37.2 | 8537 | bc |
| sewing.mach.operators | sewing.mach.operators | 6.38 | 2847 | 90.67 | 28.2 | 8563 | bc |
| auto.repairmen | auto.repairmen | 8.10 | 5795 | 0.81 | 38.1 | 8581 | bc |
| aircraft.repairmen | aircraft.repairmen | 10.10 | 7716 | 0.78 | 50.3 | 8582 | bc |
| railway.sectionmen | railway.sectionmen | 6.67 | 4696 | 0.00 | 27.3 | 8715 | bc |
| electrical.linemen | electrical.linemen | 9.05 | 8316 | 1.34 | 40.9 | 8731 | bc |
| electricians | electricians | 9.93 | 7147 | 0.99 | 50.2 | 8733 | bc |
| construction.foremen | construction.foremen | 8.24 | 8880 | 0.65 | 51.1 | 8780 | bc |
| carpenters | carpenters | 6.92 | 5299 | 0.56 | 38.9 | 8781 | bc |
| masons | masons | 6.60 | 5959 | 0.52 | 36.2 | 8782 | bc |
| house.painters | house.painters | 7.81 | 4549 | 2.46 | 29.9 | 8785 | bc |
| plumbers | plumbers | 8.33 | 6928 | 0.61 | 42.9 | 8791 | bc |
| construction.labourers | construction.labourers | 7.52 | 3910 | 1.09 | 26.5 | 8798 | bc |
| pilots | pilots | 12.27 | 14032 | 0.58 | 66.1 | 9111 | prof |
| train.engineers | train.engineers | 8.49 | 8845 | 0.00 | 48.9 | 9131 | bc |
| bus.drivers | bus.drivers | 7.58 | 5562 | 9.47 | 35.9 | 9171 | bc |
| taxi.drivers | taxi.drivers | 7.93 | 4224 | 3.59 | 25.1 | 9173 | bc |
| longshoremen | longshoremen | 8.37 | 4753 | 0.00 | 26.1 | 9313 | bc |
| typesetters | typesetters | 10.00 | 6462 | 13.58 | 42.2 | 9511 | bc |
| bookbinders | bookbinders | 8.55 | 3617 | 70.87 | 35.2 | 9517 | bc |
prestige.mod.2 = lm(prestige ~ education + income + type, data = Prestige)
#all predictors and overall
residualPlots(prestige.mod.2)
## Test stat Pr(>|Test stat|)
## education -0.6836 0.495942
## income -2.8865 0.004854 **
## type
## Tukey test -2.6104 0.009043 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# no predictors, only overall
# residualPlots(prestige.mod.2, ~1)
#only one predictor
# residualPlots(prestige.mod.2, ~education , fitted = F)
Variation of basic residual plot above:
#Univariate view:
marginalModelPlots(prestige.mod.2)
## Warning in mmps(...): Interactions and/or factors skipped
Added-variable plots (partial-regression plots). Partial relationship adjusted for all other variables in the model (as opposed to univariate above)
avPlots(prestige.mod.2)#, id.n=2)#, id.cex=.6)
#page 295
data(Duncan) #US occupation data
Duncan
## type income education prestige
## accountant prof 62 86 82
## pilot prof 72 76 83
## architect prof 75 92 90
## author prof 55 90 76
## chemist prof 64 86 90
## minister prof 21 84 87
## professor prof 64 93 93
## dentist prof 80 100 90
## reporter wc 67 87 52
## engineer prof 72 86 88
## undertaker prof 42 74 57
## lawyer prof 76 98 89
## physician prof 76 97 97
## welfare.worker prof 41 84 59
## teacher prof 48 91 73
## conductor wc 76 34 38
## contractor prof 53 45 76
## factory.owner prof 60 56 81
## store.manager prof 42 44 45
## banker prof 78 82 92
## bookkeeper wc 29 72 39
## mail.carrier wc 48 55 34
## insurance.agent wc 55 71 41
## store.clerk wc 29 50 16
## carpenter bc 21 23 33
## electrician bc 47 39 53
## RR.engineer bc 81 28 67
## machinist bc 36 32 57
## auto.repairman bc 22 22 26
## plumber bc 44 25 29
## gas.stn.attendant bc 15 29 10
## coal.miner bc 7 7 15
## streetcar.motorman bc 42 26 19
## taxi.driver bc 9 19 10
## truck.driver bc 21 15 13
## machine.operator bc 21 20 24
## barber bc 16 26 20
## bartender bc 16 28 7
## shoe.shiner bc 9 17 3
## cook bc 14 22 16
## soda.clerk bc 12 30 6
## watchman bc 17 25 11
## janitor bc 7 20 8
## policeman bc 34 47 41
## waiter bc 8 32 10
mod.duncan = lm(prestige ~ income + education, data = Duncan)
qqPlot(mod.duncan, id.n=3)
## minister reporter
## 6 9
p 296
outlierTest(mod.duncan)
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferroni p
## minister 3.134519 0.0031772 0.14297
hat-values
suppressWarnings(influenceIndexPlot(mod.duncan , id.n=3))
suppressWarnings(influencePlot(mod.duncan, id.n=3))
## StudRes Hat CookD
## minister 3.1345186 0.17305816 0.56637974
## reporter -2.3970224 0.05439356 0.09898456
## conductor -1.7040324 0.19454165 0.22364122
## RR.engineer 0.8089221 0.26908963 0.08096807
mod.duncan.2 = update(mod.duncan, subset = rownames(Duncan) != "minister")
compareCoefs(mod.duncan, mod.duncan.2)
## Calls:
## 1: lm(formula = prestige ~ income + education, data = Duncan)
## 2: lm(formula = prestige ~ income + education, data = Duncan, subset =
## rownames(Duncan) != "minister")
##
## Model 1 Model 2
## (Intercept) -6.06 -6.63
## SE 4.27 3.89
##
## income 0.599 0.732
## SE 0.120 0.117
##
## education 0.5458 0.4330
## SE 0.0983 0.0963
##
mod.duncan.3 = update(mod.duncan, subset = !(rownames(Duncan) %in% c("conductor","minister")))
compareCoefs(mod.duncan, mod.duncan.2, mod.duncan.3, se=F)
## Calls:
## 1: lm(formula = prestige ~ income + education, data = Duncan)
## 2: lm(formula = prestige ~ income + education, data = Duncan, subset =
## rownames(Duncan) != "minister")
## 3: lm(formula = prestige ~ income + education, data = Duncan, subset =
## !(rownames(Duncan) %in% c("conductor", "minister")))
##
## Model 1 Model 2 Model 3
## (Intercept) -6.06 -6.63 -6.41
## income 0.599 0.732 0.867
## education 0.546 0.433 0.332
p 300
suppressWarnings(avPlots(mod.duncan, id.n = 3))
dfbs_duncan = as_tibble(data.frame(rownames = row.names(dfbetas(mod.duncan)),dfbetas(mod.duncan)))
dfbs_duncan
## # A tibble: 45 x 4
## rownames X.Intercept. income education
## <fct> <dbl> <dbl> <dbl>
## 1 accountant -0.0225 0.000666 0.0359
## 2 pilot -0.0254 0.0509 -0.00812
## 3 architect -0.00919 0.00648 0.00562
## 4 author -0.0000472 -0.0000602 0.000140
## 5 chemist -0.0658 0.0170 0.0868
## 6 minister 0.145 -1.22 1.26
## 7 professor -0.0763 -0.0137 0.122
## 8 dentist 0.0819 -0.0463 -0.0529
## 9 reporter 0.217 -0.102 -0.222
## 10 engineer -0.0312 0.0289 0.0159
## # … with 35 more rows
dfbs_duncan %>%
ggplot(aes(x = income, y = education))+
geom_point()+
geom_text(data = dfbs_duncan %>% filter(income < -.5 | income > .4),aes(income,education,label=rownames))
ggplotly(dfbs_duncan %>%
ggplot(aes(x = income, y = education))+
geom_point(aes(text = rownames))
)
## Warning: Ignoring unknown aesthetics: text
#manual filter based on graph above to get rownames:
dfbs_duncan %>%
filter(education > 1)
## # A tibble: 1 x 4
## rownames X.Intercept. income education
## <fct> <dbl> <dbl> <dbl>
## 1 minister 0.145 -1.22 1.26
p304
mod.ornstein = lm(interlocks +1 ~ log(assets) + nation + sector, data = Ornstein)
# par(mfrow=c(1,2))
qqPlot(mod.ornstein, id.n=0)
## [1] 2 44
plot(density(rstudent(mod.ornstein)))
boxCox(mod.ornstein, lambda = seq(0,.6, by=.1))
#max lambda approx .22.
similar to box Cox above but produces numeric instead of graphical output:
p1 = powerTransform(mod.ornstein)
## Warning in model.matrix.default(mt, mf, contrasts): non-list contrasts
## argument ignored
summary(p1)
## bcPower Transformation to Normality
## Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1 0.2227 0.22 0.126 0.3193
##
## Likelihood ratio test that transformation parameter is equal to 0
## (log transformation)
## LRT df pval
## LR test, lambda = (0) 19.75696 1 8.7941e-06
##
## Likelihood ratio test that no transformation is needed
## LRT df pval
## LR test, lambda = (1) 243.4049 1 < 2.22e-16
# Ornstein1_old = transform(Ornstein,
# y1=bcPower(interlocks +1, coef(p1)),
# y1round = bcPower(interlocks+1, coef(p1,round=T))
# ) #from the book
Ornstein1 = Ornstein %>%
mutate(#y1=bcPower(interlocks +1, coef(p1)), #not used. same results as rounded below
interlocks_transform = bcPower(interlocks+1, lambda = coef(p1,round=T))
)
Ornstein1
## assets sector nation interlocks interlocks_transform
## 1 147670 BNK CAN 87 7.6797325
## 2 133000 BNK CAN 107 8.2475809
## 3 113230 BNK CAN 94 7.8889363
## 4 85418 BNK CAN 48 6.1920496
## 5 75477 BNK CAN 66 6.9628392
## 6 40742 FIN CAN 69 7.0751001
## 7 40140 TRN CAN 46 6.0933783
## 8 26866 BNK CAN 16 3.9486476
## 9 24500 TRN CAN 77 7.3571781
## 10 23700 MIN US 6 2.4355835
## 11 23482 FIN CAN 18 4.1602769
## 12 21512 FIN CAN 29 5.0864594
## 13 20780 MIN US 36 5.5443152
## 14 18688 FIN CAN 20 4.3552382
## 15 18286 BNK CAN 13 3.5915568
## 16 17910 MIN US 6 2.4355835
## 17 17784 FIN CAN 31 5.2250875
## 18 16631 FIN CAN 27 4.9404501
## 19 16458 FIN CAN 23 4.6222140
## 20 15280 MIN US 13 3.5915568
## 21 15140 FIN CAN 32 5.2918893
## 22 14362 FIN CAN 28 5.0144334
## 23 14163 AGR CAN 4 1.9355970
## 24 13820 HLD CAN 42 5.8858090
## 25 13787 FIN CAN 17 4.0567484
## 26 12810 TRN CAN 40 5.7763420
## 27 12080 MIN US 29 5.0864594
## 28 11250 MIN CAN 29 5.0864594
## 29 11090 MAN US 21 4.4473483
## 30 10580 MIN OTH 3 1.6240827
## 31 10570 MAN CAN 32 5.2918893
## 32 10568 FIN CAN 29 5.0864594
## 33 10320 MIN CAN 40 5.7763420
## 34 10110 TRN US 5 2.2018668
## 35 9044 WOD CAN 33 5.3571357
## 36 8395 FIN CAN 21 4.4473483
## 37 8182 FIN US 18 4.1602769
## 38 7994 TRN US 18 4.1602769
## 39 7930 FIN CAN 13 3.5915568
## 40 7877 MAN US 2 1.2446483
## 41 7564 FIN US 22 4.5362592
## 42 7510 FIN US 13 3.5915568
## 43 7287 FIN US 2 1.2446483
## 44 7018 BNK CAN 0 0.0000000
## 45 6629 MIN US 18 4.1602769
## 46 6571 FIN CAN 9 3.0081277
## 47 6498 TRN CAN 31 5.2250875
## 48 6407 AGR CAN 16 3.9486476
## 49 6286 MIN CAN 28 5.0144334
## 50 5932 FIN CAN 34 5.4209070
## 51 5704 MIN CAN 33 5.3571357
## 52 5479 MER CAN 7 2.6446249
## 53 5437 TRN US 12 3.4592772
## 54 5429 MER US 15 3.8354850
## 55 5366 TRN US 13 3.5915568
## 56 5035 MIN US 16 3.9486476
## 57 5021 MIN OTH 27 4.9404501
## 58 4980 AGR CAN 12 3.4592772
## 59 4838 TRN CAN 11 3.3188352
## 60 4634 MIN US 0 0.0000000
## 61 4592 TRN US 16 3.9486476
## 62 4390 TRN CAN 17 4.0567484
## 63 4304 WOD CAN 55 6.5144598
## 64 4298 AGR OTH 15 3.8354850
## 65 4227 FIN CAN 44 5.9913871
## 66 4210 MIN US 18 4.1602769
## 67 4154 FIN OTH 20 4.3552382
## 68 4100 MAN US 19 4.2596528
## 69 4099 MAN US 9 3.0081277
## 70 4088 FIN CAN 12 3.4592772
## 71 3960 CON OTH 17 4.0567484
## 72 3896 AGR CAN 15 3.8354850
## 73 3879 WOD CAN 27 4.9404501
## 74 3673 MER CAN 30 5.1566426
## 75 3654 MIN OTH 27 4.9404501
## 76 3631 TRN US 12 3.4592772
## 77 3606 MIN UK 11 3.3188352
## 78 3570 MER CAN 28 5.0144334
## 79 3561 MIN US 8 2.8342429
## 80 3274 TRN CAN 12 3.4592772
## 81 3152 MAN US 18 4.1602769
## 82 3058 WOD UK 23 4.6222140
## 83 2958 WOD CAN 51 6.3343437
## 84 2927 MIN OTH 35 5.4832774
## 85 2878 MER CAN 8 2.8342429
## 86 2814 AGR CAN 43 5.9390644
## 87 2807 MIN CAN 4 1.9355970
## 88 2801 WOD CAN 18 4.1602769
## 89 2786 AGR OTH 20 4.3552382
## 90 2757 MIN UK 13 3.5915568
## 91 2667 MIN UK 22 4.5362592
## 92 2625 AGR UK 10 3.1689789
## 93 2566 MER US 1 0.7494995
## 94 2549 HLD US 6 2.4355835
## 95 2488 AGR CAN 30 5.1566426
## 96 2285 MIN US 6 2.4355835
## 97 2281 WOD US 11 3.3188352
## 98 2182 AGR US 20 4.3552382
## 99 2165 MIN US 7 2.6446249
## 100 2164 MIN OTH 13 3.5915568
## 101 2141 MIN US 0 0.0000000
## 102 2108 MAN UK 14 3.7166837
## 103 2086 MIN US 19 4.2596528
## 104 2025 AGR CAN 4 1.9355970
## 105 1881 AGR CAN 2 1.2446483
## 106 1876 MER CAN 2 1.2446483
## 107 1841 WOD US 7 2.6446249
## 108 1656 MIN OTH 25 4.7860944
## 109 1655 MER CAN 29 5.0864594
## 110 1612 MER CAN 5 2.2018668
## 111 1603 MIN US 12 3.4592772
## 112 1601 WOD CAN 25 4.7860944
## 113 1591 AGR US 2 1.2446483
## 114 1583 HLD CAN 25 4.7860944
## 115 1561 MIN US 2 1.2446483
## 116 1520 MAN US 16 3.9486476
## 117 1511 WOD US 3 1.6240827
## 118 1487 MIN OTH 9 3.0081277
## 119 1482 MAN US 1 0.7494995
## 120 1477 MAN US 0 0.0000000
## 121 1469 MIN US 1 0.7494995
## 122 1434 MIN US 1 0.7494995
## 123 1427 HLD CAN 1 0.7494995
## 124 1416 MER CAN 6 2.4355835
## 125 1378 MIN US 12 3.4592772
## 126 1372 MIN US 5 2.2018668
## 127 1343 WOD UK 5 2.2018668
## 128 1337 MIN US 0 0.0000000
## 129 1335 MAN CAN 4 1.9355970
## 130 1315 MIN OTH 5 2.2018668
## 131 1235 MAN CAN 33 5.3571357
## 132 1172 AGR CAN 11 3.3188352
## 133 1154 MAN US 3 1.6240827
## 134 1154 HLD CAN 3 1.6240827
## 135 1112 AGR US 5 2.2018668
## 136 1060 AGR CAN 25 4.7860944
## 137 1027 MIN OTH 14 3.7166837
## 138 984 MER US 1 0.7494995
## 139 978 MAN US 0 0.0000000
## 140 953 MIN US 12 3.4592772
## 141 950 WOD CAN 18 4.1602769
## 142 943 MER US 11 3.3188352
## 143 904 MIN CAN 39 5.7200444
## 144 898 AGR UK 3 1.6240827
## 145 888 WOD CAN 2 1.2446483
## 146 848 WOD CAN 8 2.8342429
## 147 844 MAN US 0 0.0000000
## 148 839 TRN CAN 11 3.3188352
## 149 832 AGR CAN 13 3.5915568
## 150 830 MIN UK 1 0.7494995
## 151 816 MIN CAN 10 3.1689789
## 152 809 MAN UK 0 0.0000000
## 153 802 MAN CAN 0 0.0000000
## 154 798 AGR CAN 11 3.3188352
## 155 789 MIN UK 9 3.0081277
## 156 789 MAN US 6 2.4355835
## 157 782 MER CAN 11 3.3188352
## 158 780 MAN US 1 0.7494995
## 159 779 MER CAN 14 3.7166837
## 160 761 WOD US 1 0.7494995
## 161 751 AGR CAN 8 2.8342429
## 162 742 AGR CAN 7 2.6446249
## 163 727 AGR CAN 1 0.7494995
## 164 707 AGR US 9 3.0081277
## 165 704 HLD CAN 10 3.1689789
## 166 702 MAN CAN 3 1.6240827
## 167 690 WOD OTH 0 0.0000000
## 168 677 MAN CAN 12 3.4592772
## 169 638 MAN US 6 2.4355835
## 170 637 AGR US 1 0.7494995
## 171 636 MIN US 0 0.0000000
## 172 614 CON CAN 2 1.2446483
## 173 590 MAN US 2 1.2446483
## 174 589 MIN OTH 23 4.6222140
## 175 586 TRN CAN 10 3.1689789
## 176 575 AGR CAN 1 0.7494995
## 177 566 AGR US 0 0.0000000
## 178 559 MAN US 7 2.6446249
## 179 558 AGR CAN 14 3.7166837
## 180 552 AGR CAN 7 2.6446249
## 181 548 MIN US 5 2.2018668
## 182 540 MAN CAN 6 2.4355835
## 183 539 AGR CAN 9 3.0081277
## 184 533 TRN CAN 5 2.2018668
## 185 523 MAN US 8 2.8342429
## 186 519 MAN US 8 2.8342429
## 187 519 AGR CAN 0 0.0000000
## 188 516 TRN CAN 5 2.2018668
## 189 511 HLD CAN 0 0.0000000
## 190 510 MAN CAN 11 3.3188352
## 191 508 MAN OTH 1 0.7494995
## 192 497 AGR CAN 4 1.9355970
## 193 495 MAN CAN 0 0.0000000
## 194 494 MER CAN 8 2.8342429
## 195 488 MAN CAN 1 0.7494995
## 196 487 MAN CAN 8 2.8342429
## 197 471 MER US 0 0.0000000
## 198 456 MIN US 5 2.2018668
## 199 456 MAN US 0 0.0000000
## 200 444 AGR US 1 0.7494995
## 201 438 MAN UK 18 4.1602769
## 202 432 MAN US 1 0.7494995
## 203 432 MAN CAN 3 1.6240827
## 204 422 WOD US 11 3.3188352
## 205 407 MIN US 6 2.4355835
## 206 402 MAN US 0 0.0000000
## 207 391 AGR CAN 28 5.0144334
## 208 387 MER CAN 11 3.3188352
## 209 386 CON OTH 2 1.2446483
## 210 379 MAN US 16 3.9486476
## 211 376 MER CAN 5 2.2018668
## 212 375 MAN US 8 2.8342429
## 213 372 AGR US 8 2.8342429
## 214 370 AGR UK 3 1.6240827
## 215 364 TRN US 5 2.2018668
## 216 361 MIN US 2 1.2446483
## 217 359 AGR US 0 0.0000000
## 218 358 AGR US 0 0.0000000
## 219 352 WOD CAN 21 4.4473483
## 220 350 MAN US 1 0.7494995
## 221 345 MER US 8 2.8342429
## 222 332 MAN US 0 0.0000000
## 223 326 AGR CAN 3 1.6240827
## 224 326 AGR US 0 0.0000000
## 225 326 MAN CAN 28 5.0144334
## 226 325 MAN OTH 0 0.0000000
## 227 318 AGR CAN 2 1.2446483
## 228 312 MAN US 2 1.2446483
## 229 305 AGR UK 4 1.9355970
## 230 304 MER US 4 1.9355970
## 231 303 WOD UK 3 1.6240827
## 232 297 CON CAN 2 1.2446483
## 233 276 MIN CAN 9 3.0081277
## 234 270 MAN CAN 4 1.9355970
## 235 261 CON UK 1 0.7494995
## 236 256 MAN US 1 0.7494995
## 237 245 MIN CAN 11 3.3188352
## 238 241 MIN US 3 1.6240827
## 239 225 AGR US 6 2.4355835
## 240 225 MIN UK 8 2.8342429
## 241 220 AGR US 0 0.0000000
## 242 201 AGR US 5 2.2018668
## 243 200 MAN US 0 0.0000000
## 244 188 MAN US 0 0.0000000
## 245 160 AGR CAN 0 0.0000000
## 246 158 AGR CAN 5 2.2018668
## 247 119 AGR CAN 6 2.4355835
## 248 62 MIN US 0 0.0000000
# summary(Ornstein1)
mod.ornstein.trans = update(mod.ornstein, interlocks_transform ~., data = Ornstein1)
qqPlot(mod.ornstein.trans, id.n=0)
## [1] 44 60
plot(density(rstudent(mod.ornstein.trans)))
constructed variable plot p 307
mod.orstein.constr_var = update(mod.ornstein, .~. +boxCoxVariable(interlocks+1))
summary(mod.orstein.constr_var) #below is from book, but this is easier code and you just find that variable in table
##
## Call:
## lm(formula = interlocks + 1 ~ log(assets) + nation + sector +
## boxCoxVariable(interlocks + 1), data = Ornstein)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.9153 -3.9372 0.4998 4.1919 13.1209
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.39627 2.69620 -1.631 0.104338
## log(assets) 2.71779 0.37400 7.267 5.51e-12 ***
## nationOTH -0.60586 1.59413 -0.380 0.704251
## nationUK -2.01746 1.58843 -1.270 0.205317
## nationUS -4.75160 0.89731 -5.295 2.74e-07 ***
## sectorBNK -10.60540 2.88882 -3.671 0.000299 ***
## sectorCON -3.19282 2.79579 -1.142 0.254621
## sectorFIN 1.41405 1.76186 0.803 0.423029
## sectorHLD -1.71708 2.37789 -0.722 0.470956
## sectorMAN -0.04559 1.22084 -0.037 0.970241
## sectorMER 0.78174 1.56550 0.499 0.618002
## sectorMIN 1.98125 1.26012 1.572 0.117246
## sectorTRN 1.66995 1.70212 0.981 0.327560
## sectorWOD 2.67821 1.60017 1.674 0.095531 .
## boxCoxVariable(interlocks + 1) 0.61615 0.02421 25.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.806 on 233 degrees of freedom
## Multiple R-squared: 0.8771, Adjusted R-squared: 0.8697
## F-statistic: 118.7 on 14 and 233 DF, p-value: < 2.2e-16
# summary(mod.orstein.constr_var)$coef["boxCoxVariable(interlocks + 1)",]#drop = F]
# avPlots(mod.orstein.constr_var) #have to hit enter in console but produces all vars
avPlots(mod.orstein.constr_var, variable = "boxCoxVariable(interlocks + 1)")
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
## Warning in applyDefaults(id, defaults = list(method =
## list(abs(residuals(model, : unnamed id arguments, will be ignored
transforming predictors compononet-plus-residual plots (or partial-residual-plots)
prestige.mod.3 = update(prestige.mod.2, ~. - type + women)
crPlots(prestige.mod.3,order=2) #book says this is nearly identical to order=1 and CERES plots
crPlots(prestige.mod.3,order=1)
prestige.mod.4 = update(prestige.mod.3, .~. +log2(income) - income)
crPlots(prestige.mod.4,order=2)
prestige.mod.5 = update(prestige.mod.4, .~. - women + poly(women,2))
crPlots(prestige.mod.5,order=1)
summary(prestige.mod.4)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -110.96582409 14.84292810 -7.476006 3.269774e-11
## education 3.73050783 0.35438304 10.526767 8.730063e-18
## women 0.04689514 0.02989886 1.568459 1.199974e-01
## log2(income) 9.31466643 1.32651512 7.021907 2.895566e-10
summary(prestige.mod.5)$coef #why does poly have to coefs?
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -110.599683 13.9817384 -7.910296 4.159978e-12
## education 3.770068 0.3474857 10.849564 1.984736e-18
## log2(income) 9.360129 1.2992237 7.204402 1.261807e-10
## poly(women, 2)1 15.088276 9.3356690 1.616197 1.092997e-01
## poly(women, 2)2 15.871413 6.9704389 2.276960 2.498556e-02
p 314
residualPlots(mod.ornstein, ~1 ,fitted = T, id.n=0,quadratic=F,tests=F)
## Warning in plot.window(...): "id.n" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "id.n" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not
## a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "id.n" is not
## a graphical parameter
## Warning in box(...): "id.n" is not a graphical parameter
## Warning in title(...): "id.n" is not a graphical parameter
## Warning in plot.xy(xy.coords(x, y), type = type, ...): "id.n" is not a
## graphical parameter
spreadLevelPlot(mod.ornstein)
## Warning in spreadLevelPlot.lm(mod.ornstein):
## 16 negative fitted values removed
##
## Suggested power transformation: 0.5540406
p 315 collinearity
data(Ericksen)
Ericksen
## minority crime poverty language highschool housing city
## Alabama 26.1 49 18.9 0.2 43.5 7.6 state
## Alaska 5.7 62 10.7 1.7 17.5 23.6 state
## Arizona 18.9 81 13.2 3.2 27.6 8.1 state
## Arkansas 16.9 38 19.0 0.2 44.5 7.0 state
## California.R 24.3 73 10.4 5.0 26.0 11.8 state
## Colorado 15.2 73 10.1 1.2 21.4 9.2 state
## Connecticut 10.8 58 8.0 2.4 29.7 21.0 state
## Delaware 17.5 68 11.8 0.7 31.4 8.9 state
## Florida 22.3 81 13.4 3.6 33.3 10.1 state
## Georgia 27.6 55 16.6 0.3 43.6 10.2 state
## Hawaii 9.1 75 9.9 5.7 26.2 17.0 state
## Idaho 4.2 48 12.6 1.0 26.3 9.1 state
## Illinois.R 8.1 48 7.7 1.0 29.8 13.5 state
## Indiana.R 7.1 48 9.4 0.5 33.6 9.9 state
## Iowa 2.3 47 10.1 0.3 28.5 10.4 state
## Kansas 7.9 54 10.1 0.5 26.7 8.5 state
## Kentucky 7.7 34 17.6 0.2 46.9 10.6 state
## Louisiana 31.4 54 18.6 1.1 42.3 9.7 state
## Maine 0.7 44 13.0 1.0 31.3 19.5 state
## Maryland.R 16.7 58 6.8 0.8 28.2 10.5 state
## Massachusetts.R 3.8 53 8.5 2.1 27.4 26.9 state
## Michigan.R 7.0 61 8.7 0.7 29.9 9.4 state
## Minnesota 2.1 48 9.5 0.5 26.9 10.7 state
## Mississippi 35.8 34 23.9 0.2 45.2 7.2 state
## Missouri.R 7.8 45 11.2 0.3 34.9 9.1 state
## Montana 1.5 50 12.3 0.4 25.6 12.8 state
## Nebraska 4.8 43 10.7 0.5 26.6 9.7 state
## Nevada 13.0 88 8.7 1.6 24.5 11.7 state
## New.Hampshire 1.0 47 8.5 0.8 27.7 20.3 state
## New.Jersey 19.0 64 9.5 3.6 32.6 23.7 state
## New.Mexico 38.4 59 17.6 4.6 31.1 10.7 state
## New.York.R 8.0 48 8.9 1.3 29.3 21.6 state
## North.Carolina 23.1 46 14.8 0.2 45.2 8.2 state
## North.Dakota 1.0 30 12.6 0.5 33.6 15.1 state
## Ohio.R 8.9 52 9.6 0.5 32.1 11.3 state
## Oklahoma 8.6 50 13.4 0.5 34.0 8.0 state
## Oregon 3.9 60 10.7 0.8 24.4 7.9 state
## Pennsylvania.R 4.8 33 8.8 0.6 33.6 13.3 state
## Rhode.Island 4.9 59 10.3 3.2 38.9 29.6 state
## South.Carolina 31.0 53 16.6 0.2 46.3 7.9 state
## South.Dakota 0.9 32 16.9 0.4 32.1 12.0 state
## Tennessee 16.4 44 16.4 0.2 43.8 9.4 state
## Texas.R 30.6 55 15.0 4.7 38.7 7.7 state
## Utah 4.7 58 10.3 0.9 20.0 11.3 state
## Vermont 0.9 50 12.1 0.5 29.0 20.8 state
## Virginia 20.0 46 11.8 0.5 37.6 10.3 state
## Washington 5.4 69 9.8 1.0 22.4 9.4 state
## West.Virginia 3.9 25 15.0 0.2 44.0 9.0 state
## Wisconsin.R 1.7 45 7.9 0.4 29.5 12.8 state
## Wyoming 5.9 49 7.9 0.7 22.1 13.2 state
## Baltimore 55.5 100 22.9 0.7 51.6 23.3 city
## Boston 28.4 135 20.2 4.4 31.6 52.1 city
## Chicago 53.7 66 20.3 6.7 43.8 51.4 city
## Cleveland 46.7 101 22.1 1.6 49.1 36.4 city
## Dallas 41.6 118 14.2 3.1 31.5 12.9 city
## Detroit 65.4 106 21.9 1.6 45.8 18.6 city
## Houston 45.1 80 12.7 5.1 31.6 8.9 city
## Indianapolis 22.5 53 11.5 0.3 33.3 13.6 city
## Los.Angeles 44.4 100 16.4 12.7 31.4 15.0 city
## Milwaukee 27.2 65 13.8 1.6 46.4 27.2 city
## New.York.City 44.0 101 20.0 8.9 39.8 32.2 city
## Philadelphia 41.3 60 20.6 2.2 45.7 21.7 city
## Saint.Louis 46.7 143 21.8 0.5 51.8 40.9 city
## San.Diego 23.6 81 12.4 4.2 21.1 11.2 city
## San.Francisco 24.8 107 13.7 9.2 26.0 20.3 city
## Washington.DC 72.6 102 18.6 1.1 32.9 21.0 city
## conventional undercount
## Alabama 0 -0.04
## Alaska 100 3.35
## Arizona 18 2.48
## Arkansas 0 -0.74
## California.R 4 3.60
## Colorado 19 1.34
## Connecticut 0 -0.26
## Delaware 0 -0.16
## Florida 0 2.20
## Georgia 0 0.37
## Hawaii 29 1.46
## Idaho 56 1.53
## Illinois.R 0 1.69
## Indiana.R 0 -0.68
## Iowa 0 -0.59
## Kansas 14 0.94
## Kentucky 0 -1.41
## Louisiana 0 2.46
## Maine 40 2.06
## Maryland.R 0 2.03
## Massachusetts.R 4 -0.57
## Michigan.R 8 0.89
## Minnesota 11 1.57
## Mississippi 0 1.52
## Missouri.R 0 0.81
## Montana 75 1.81
## Nebraska 33 0.36
## Nevada 10 5.08
## New.Hampshire 0 -1.49
## New.Jersey 0 1.44
## New.Mexico 58 2.69
## New.York.R 0 -1.48
## North.Carolina 0 1.36
## North.Dakota 70 0.35
## Ohio.R 0 0.97
## Oklahoma 0 -0.12
## Oregon 13 0.93
## Pennsylvania.R 0 -0.78
## Rhode.Island 0 0.74
## South.Carolina 0 6.19
## South.Dakota 84 0.42
## Tennessee 0 -2.31
## Texas.R 1 0.27
## Utah 14 1.14
## Vermont 0 -1.12
## Virginia 0 1.11
## Washington 4 1.48
## West.Virginia 0 -0.69
## Wisconsin.R 9 1.45
## Wyoming 100 4.01
## Baltimore 0 6.15
## Boston 0 2.27
## Chicago 0 5.42
## Cleveland 0 5.01
## Dallas 0 8.18
## Detroit 0 4.33
## Houston 0 5.79
## Indianapolis 0 0.31
## Los.Angeles 0 7.52
## Milwaukee 0 3.17
## New.York.City 0 7.39
## Philadelphia 0 6.41
## Saint.Louis 0 3.60
## San.Diego 0 0.47
## San.Francisco 0 5.18
## Washington.DC 0 5.93
model_census = lm(undercount ~ . ,data=Ericksen)
summary(model_census)
##
## Call:
## lm(formula = undercount ~ ., data = Ericksen)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8356 -0.8033 -0.0553 0.7050 4.2467
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.611411 1.720843 -0.355 0.723678
## minority 0.079834 0.022609 3.531 0.000827 ***
## crime 0.030117 0.012998 2.317 0.024115 *
## poverty -0.178369 0.084916 -2.101 0.040117 *
## language 0.215125 0.092209 2.333 0.023200 *
## highschool 0.061290 0.044775 1.369 0.176415
## housing -0.034957 0.024630 -1.419 0.161259
## citystate -1.159982 0.770644 -1.505 0.137791
## conventional 0.036989 0.009253 3.997 0.000186 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.426 on 57 degrees of freedom
## Multiple R-squared: 0.7077, Adjusted R-squared: 0.6667
## F-statistic: 17.25 on 8 and 57 DF, p-value: 1.044e-12
model_census_2 = lm(undercount ~ . -highschool - poverty - housing - city,data=Ericksen)
summary(model_census_2)
##
## Call:
## lm(formula = undercount ~ . - highschool - poverty - housing -
## city, data = Ericksen)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8038 -0.9149 0.1019 0.7776 4.3541
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.975947 0.550062 -3.592 0.000655 ***
## minority 0.075188 0.014259 5.273 1.86e-06 ***
## crime 0.027154 0.010391 2.613 0.011280 *
## language 0.209353 0.086737 2.414 0.018811 *
## conventional 0.027296 0.007771 3.512 0.000842 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.467 on 61 degrees of freedom
## Multiple R-squared: 0.6691, Adjusted R-squared: 0.6475
## F-statistic: 30.84 on 4 and 61 DF, p-value: 4.781e-14
Check for collinearity (vif > 4)
vif(model_census) #can also be called on glms
## minority crime poverty language highschool
## 5.009065 3.343586 4.625178 1.635568 4.619169
## housing city conventional
## 1.871745 3.537750 1.691320
Ericksen %>% pull(undercount) %>% hist
model_census_glm = glm(undercount ~ . , data = Ericksen)
model_census_glm #same as lm() above since family not specified
##
## Call: glm(formula = undercount ~ ., data = Ericksen)
##
## Coefficients:
## (Intercept) minority crime poverty language
## -0.61141 0.07983 0.03012 -0.17837 0.21512
## highschool housing citystate conventional
## 0.06129 -0.03496 -1.15998 0.03699
##
## Degrees of Freedom: 65 Total (i.e. Null); 57 Residual
## Null Deviance: 396.8
## Residual Deviance: 116 AIC: 244.5
vif(model_census_glm)
## minority crime poverty language highschool
## 5.009065 3.343586 4.625178 1.635568 4.619169
## housing city conventional
## 1.871745 3.537750 1.691320